{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# The file to generate the results reported by actually performing SVM on gram matrices generated earlier"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "#### The naming convention for a grammian matrix is {alpha}'kmat'{num_examples}'.pickle''"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "from sklearn import svm,cross_validation\n",
    "import cPickle\n",
    "import math"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['0.1kmat736.pickle', '0.2kmat736.pickle', '0.3kmat736.pickle', '0.4kmat736.pickle', '0.5kmat736.pickle', '0.6kmat736.pickle', '0.7kmat736.pickle', '0.8kmat736.pickle', '1.0kmat736.pickle', '1.1kmat736.pickle']\n",
      "233 503\n"
     ]
    }
   ],
   "source": [
    "\n",
    "x_mat  = cPickle.load(open('kmat736.pickle','rb'))\n",
    "y_mat = cPickle.load(open('y736.pickle','rb'))\n",
    "\n",
    "Y_mat = np.array(y_mat)\n",
    "Y_mat[Y_mat!=1]=0\n",
    "print Y_mat.sum() , Y_mat.size - Y_mat.sum()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Normalization Function\n",
    "import math\n",
    "\n",
    "def normalized_val(x,i,j):\n",
    "    return x[i][j]/math.sqrt(x[i][i] * x[j][j])\n",
    "def normalize(x):\n",
    "        return [[normalized_val(x,i,j) for j in range(len(x[i]))] for i in range(len(x))]\n",
    "    \n",
    "def cross_fold_validation(datas,C_list = [1] , cross_fold = 5):\n",
    "    acc_c = []\n",
    "    f1_c = []\n",
    "    for j,c in enumerate(C_list):\n",
    "        svc = svm.SVC(C=c, kernel='precomputed')\n",
    "        f1_l = []\n",
    "        acc_l = []\n",
    "        for i in range(len(datas)):\n",
    "            data = datas[i]\n",
    "            f1 = cross_validation.cross_val_score(svc, data, Y_mat, cv=cross_fold, n_jobs=-1,scoring='f1')\n",
    "            acc= cross_validation.cross_val_score(svc, data, Y_mat, cv=cross_fold, n_jobs=-1)\n",
    "            f1_l.append(f1)\n",
    "            acc_l.append(acc)\n",
    "            print \"c = \" , c , \",,ith data = \" , i , \" ,, f1.mean => \" , f1.mean(), \"  , acc.mean => \" , \n",
    "            print acc.mean()\n",
    "        acc_c.append(acc_l)\n",
    "        f1_c.append(f1_l)\n",
    "    return acc_c, f1_c\n",
    "c_list = [1]\n",
    "# print   normalize(x_mat)[0]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 2. load the grammian matrices for all alphas - Contiguous2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['data/0.1kmat736.pickle', 'data/0.2kmat736.pickle', 'data/0.3kmat736.pickle', 'data/0.4kmat736.pickle', 'data/0.5kmat736.pickle', 'data/0.6kmat736.pickle', 'data/0.7kmat736.pickle', 'data/0.8kmat736.pickle', 'data/0.9kmat736.pickle', 'data/1.0kmat736.pickle', 'data/1.1kmat736.pickle']\n",
      "11\n",
      "736\n",
      "736\n"
     ]
    }
   ],
   "source": [
    "\n",
    "# datas is a list of gramian matrix in list of lists\n",
    "files = ['data_grammian/0.' + str(i)+ 'kmat736.pickle' for i in range(1,10) ]\n",
    "files+= ['data_grammian/1.'+ str(i) + 'kmat736.pickle' for i in range(0,2)]\n",
    "print files\n",
    "data_contiguous = [cPickle.load(open(name,'rb')) for name in files]\n",
    "print len(data_contiguous)\n",
    "# Each is 736 by 736 matrix\n",
    "print len(data_contiguous[0])\n",
    "print len(data_contiguous[0][0]) "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "un_normalized_data_contiguous = [np.array(data) for data in data_contiguous]\n",
    "normalized_data_contiguous = [np.array(normalize(data)) for data in data_contiguous ]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2.1 Best over different alphas for same c = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print \"unnormalized\"\n",
    "c_list = [1]\n",
    "un_norm = cross_fold_validation(un_normalized_data_contiguous,C_list = c_list)\n",
    "print \"normalized\"\n",
    "norm = cross_fold_validation(un_normalized_data_contiguous,C_list = c_list)\n",
    "\n",
    "# print un_norm\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 Search over different C for same alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "c_list = [1e-5,1e-2 , 1e-1 , 1, 10]\n",
    "best_index = 4\n",
    "print \"normalized\"\n",
    "un_norm = cross_fold_validation([un_normalized_data_contiguous[best_index]],C_list = c_list)\n",
    "print \"normalized\"\n",
    "norm = cross_fold_validation([un_normalized_data_contiguous[best_index]],C_list = c_list)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. load the grammian matrices for all alphas - Sparse"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false,
    "scrolled": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "['0.1kmatSparse736.pickle', '0.2kmatSparse736.pickle', '0.3kmatSparse736.pickle', '0.4kmatSparse736.pickle', '0.5kmatSparse736.pickle', '0.6kmatSparse736.pickle', '0.7kmatSparse736.pickle', '0.8kmatSparse736.pickle', '0.9kmatSparse736.pickle', '1.0kmatSparse736.pickle', '1.1kmatSparse736.pickle']\n",
      "11\n",
      "736\n",
      "736\n"
     ]
    }
   ],
   "source": [
    "# Sparse\n",
    "# datas is a list of gramian matrix in list of lists\n",
    "\n",
    "files = ['data_grammian/0.' + str(i)+ 'kmatSparse736.pickle' for i in range(1,10) ]\n",
    "files+= ['data_grammian/1.'+ str(i) + 'kmatSparse736.pickle' for i in range(0,2)]\n",
    "print files\n",
    "data_sparse = [cPickle.load(open(name,'rb')) for name in files]\n",
    "print len(data_sparse)\n",
    "print len(data_sparse[0])\n",
    "print len(data_sparse[0][0])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "un_normalized_data_sparse = [np.array(data) for data in data_sparse]\n",
    "normalized_data_sparse = [np.array(normalize(data)) for data in data_sparse ]\n",
    "# print un_normalized_datas[0][0,:]\n",
    "# print normalized_datas[0][0,:]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "# Results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.1 Best over different alphas for same c = 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "print \"unnormalized\"\n",
    "c_list = [1]\n",
    "un_norm = cross_fold_validation(un_normalized_data_sparse,C_list = c_list)\n",
    "print \"normalized\"\n",
    "norm = cross_fold_validation(normalized_data_sparse,C_list = c_list)\n",
    "\n",
    "# print un_norm\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 3.2 Search over different C for same alpha"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "c_list = [1e-5,1e-2 , 1e-1 , 1, 10]\n",
    "best_index = 0\n",
    "print \"normalized\"\n",
    "un_norm = cross_fold_validation([un_normalized_data_sparse[best_index]],C_list = c_list)\n",
    "print \"normalized\"\n",
    "norm = cross_fold_validation([normalized_data_sparse[best_index]],C_list = c_list)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 4. Results for proposed Changed Kmatrix"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": true
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "here\n",
      "unnormalized\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c =  1e-05 ,,ith data =  0  ,, f1.mean =>  0.0   , acc.mean =>  0.683432062199\n",
      "c =  0.01 ,,ith data =  0  ,, f1.mean =>  0.907567211135   , acc.mean =>  0.94155868197\n",
      "c =  0.1 ,,ith data =  0  ,, f1.mean =>  0.889006676705   , acc.mean =>  0.929285449833\n",
      "c =  1 ,,ith data =  0  ,, f1.mean =>  0.871450984296   , acc.mean =>  0.917049241022\n",
      "c =  10 ,,ith data =  0  ,, f1.mean =>  0.871450984296   , acc.mean =>  0.917049241022\n",
      "normalized\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c =  1e-05 ,,ith data =  0  ,, f1.mean =>  0.0   , acc.mean =>  0.683432062199\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n",
      "/home/asus/misc/anaconda/lib/python2.7/site-packages/sklearn/metrics/metrics.py:1771: UndefinedMetricWarning: F-score is ill-defined and being set to 0.0 due to no predicted samples.\n",
      "  'precision', 'predicted', average, warn_for)\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "c =  0.01 ,,ith data =  0  ,, f1.mean =>  0.0   , acc.mean =>  0.683432062199\n",
      "c =  0.1 ,,ith data =  0  ,, f1.mean =>  0.909731860728   , acc.mean =>  0.942891521659\n",
      "c =  1 ,,ith data =  0  ,, f1.mean =>  0.890501208971   , acc.mean =>  0.929303961496\n",
      "c =  10 ,,ith data =  0  ,, f1.mean =>  0.878828233649   , acc.mean =>  0.922491669752\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn import svm\n",
    "import cPickle\n",
    "from sklearn import  cross_validation\n",
    "\n",
    "x_mat  = cPickle.load(open('0.5changedkmat736.pickle','rb'))\n",
    "y_mat = cPickle.load(open('y736.pickle','rb'))\n",
    "# print y_mat\n",
    "\n",
    "\n",
    "K_mat = np.array(x_mat)\n",
    "K_mat_norm = np.array(normalize(x_mat))\n",
    "\n",
    "Y_mat = np.array(y_mat)\n",
    "Y_mat[Y_mat!=1]=0\n",
    "svc = svm.SVC(C=1, kernel='precomputed')\n",
    "print \"here\"\n",
    "\n",
    "c_list = [1e-5,1e-2 , 1e-1 , 1, 10]\n",
    "\n",
    "print \"unnormalized\"\n",
    "un_norm = cross_fold_validation([K_mat],C_list = c_list)\n",
    "print \"normalized\"\n",
    "norm = cross_fold_validation([K_mat_norm],C_list = c_list)\n",
    "\n",
    "# print acc.mean()\n",
    "\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
